#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'


def get_virus_type(filepath: str):
    import json
    result = {}
    with open(filepath, 'r') as load_f:
        load_dict = json.load(load_f)
    virus_types = load_dict["facet_counts"]["facet_pivot"][
        "VirusL0_s,VirusL1_s,VirusL2_s,VirusL3_s,VirusL4_s,VirusL5_s,VirusL6_s,VirusL7_s,VirusL8_s"]
    for item in virus_types:
        virus_type = item["value"]
        for family in item["pivot"]:
            family_name_taxid = family["value"].split(", taxid:")
            # print("%s\t%s\t%s" % (virus_type,family_name_taxid[0],family_name_taxid[1]))
            result[family_name_taxid[0]] = virus_type
    return result


def check_no_type(filepath: str, virus_type: dict):
    import csv
    infile = open(filepath, "r")
    infile_csv = csv.reader(infile)
    headers = next(infile_csv)
    for row in infile_csv:
        items = dict(zip(headers, row))
        if items["Family"] == "":
            print(items["Species"] + ": No family")
        else:
            if items["Family"] not in virus_type:
                print(items["Species"] + ": Not in list")
    infile.close()


if __name__ == '__main__':
    virus_type = get_virus_type("/Users/genesis/Downloads/download.json")
    check_no_type("/Users/genesis/Downloads/ncbi_human_host_virus.csv", virus_type)
